This Rmarkdown file shows the code we used to analyze demographic change over time in and outside of historic districts.

knitr::opts_chunk$set(echo = TRUE, message=F, warning=F, fig.width = 11.5, fig.height = 6.5)
library(readr)
library(sf)
## Linking to GEOS 3.13.1, GDAL 3.11.0, PROJ 9.6.0; sf_use_s2() is TRUE
library(leaflet)
library(rstudioapi)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(patchwork)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout

Set working directory to the place this script is saved:

# Getting the path of your current open file
current_path = rstudioapi::getActiveDocumentContext()$path 
setwd(dirname(current_path))

Load our data. We’ll first do the analysis using Census tracts, and then later use Census blocks.

# load data that has the dates the historic districts were designated
# comes from here: https://planning.dc.gov/page/dc-historic-districts
# hd_data <- readr::read_csv("https://docs.google.com/spreadsheets/d/1Ajl1iAS0NRB7vk_UFDveeWzGkwf3tuiDo-zV9_wtzRM/gviz/tq?tqx=out:csv&sheet=data")
hd_data <- readr::read_csv("hd_data/data.csv")

# load the historic district boundary shape files
# comes from here: https://opendata.dc.gov/datasets/DCGIS::historic-districts/about 
hd_shp <- sf::st_read("Historic_Districts/Historic_Districts.shp")
## Reading layer `Historic_Districts' from data source 
##   `C:\Users\edwar\Documents\GitHub\hd_analysis\Historic_Districts\Historic_Districts.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 73 features and 18 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -8584936 ymin: 4696608 xmax: -8564736 ymax: 4720410
## Projected CRS: WGS 84 / Pseudo-Mercator
# load the 2022 ward shape files
# comes from here: https://opendata.dc.gov/datasets/DCGIS::wards-from-2022/about
ward_shp <- sf::st_read("Wards_from_2022/Wards_from_2022.shp")
## Reading layer `Wards_from_2022' from data source 
##   `C:\Users\edwar\Documents\GitHub\hd_analysis\Wards_from_2022\Wards_from_2022.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 8 features and 20 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: -8584936 ymin: 4691870 xmax: -8561487 ymax: 4721094
## Projected CRS: WGS 84 / Pseudo-Mercator
# load the zoning map:
# comes from here: https://opendata.dc.gov/datasets/DCGIS::zoning-boundaries-zoning-regulations-of-2016/about
zone_shp <- sf::st_read("zoning/Zoning_Boundaries_(Zoning_Regulations_of_2016).shp")
## Reading layer `Zoning_Boundaries_(Zoning_Regulations_of_2016)' from data source 
##   `C:\Users\edwar\Documents\GitHub\hd_analysis\zoning\Zoning_Boundaries_(Zoning_Regulations_of_2016).shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 953 features and 15 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -8584936 ymin: 4691870 xmax: -8561487 ymax: 4721094
## Projected CRS: WGS 84 / Pseudo-Mercator
# load & clean census tract data
# please see https://opendata.dc.gov/datasets/DCGIS::census-tracts-in-1970/about
# for example, for more details on variable names
load_clean_tracts <- function(geo_id_var, black_var, white_var, totpop_var, year) {
  # Loads the shapefile, removes unneeded columns, calculates the tract area in meters^2
  shp <- sf::st_read(paste0("tract_data/Census_Tracts_in_", year, 
                            "/Census_Tracts_in_", year, ".shp")) 
  shp <- shp %>% 
    rename("geo_id" = !!sym(geo_id_var),
           "n_black" = !!sym(black_var),
           "n_white" = !!sym(white_var),
           "n_tot" = !!sym(totpop_var)
           ) %>%
    select("geo_id", starts_with("n_")) %>%
    mutate(n_other = n_tot - (n_black + n_white),
           year = year)
  shp$geo_area_meters <- sf::st_area(shp)
  shp <- sf::st_transform(shp, 4326)
  
  return(shp %>% select("year", "geo_id", "n_tot", "n_black", "n_white", "n_other", "geo_area_meters", "geometry"))
}
t60_shp <- load_clean_tracts("GISJOIN", "B58013", "B58011", "CA4001", 1960)
t70_shp <- load_clean_tracts("GISJOIN", "CEB03", "CEB01", "CY7001", 1970) 
t80_shp <- load_clean_tracts("GISJOIN", "C9D003", "C9D001", "C7L001", 1980)
t90_shp <- load_clean_tracts("TRACTNO", "BLACK", "WHITE", "POPULATION", 1990)
t00_shp <- load_clean_tracts("TRACTNO", "BLACK", "WHITE", "TOTAL", 2000)
t10_shp <- load_clean_tracts("TRACT", "P0010004", "P0010003", "P0010001", 2010)
t20_shp <- load_clean_tracts("TRACT", "P0010004", "P0010003", "P0010001", 2020)
gc()

Merge historic distrit (HD) data onto HD shapefile, subset to only look at neighborhood HDs:

hd_shp <- dplyr::left_join(x = hd_shp, y = hd_data, by = "UNIQUEID")
hd_shp <- hd_shp[hd_shp$Neighborhood_HD==1,]

Transform shape files to mercator projection:

# convert to mercator projection
zone_shp <- sf::st_transform(zone_shp, 4326)
hd_shp <- sf::st_transform(hd_shp, 4326)
ward_shp <- sf::st_transform(ward_shp, 4326)

Let’s overlay HDs onto DC’s zoning map.

# list zones:
zones_list <- sort(unique(zone_shp$ZR16))
housing_zones <- zones_list[grep(x=zones_list, pattern = "^R|^MU")]
# subset
zone_shp <- zone_shp[zone_shp$ZR16 %in% housing_zones,]

# create simplified labels
zone_shp$ZR16_simple <- "Other"
zone_shp$ZR16_simple[grep(x=zone_shp$ZR16, pattern="^RA-")] <- "Apartment zones"
zone_shp$ZR16_simple[grep(x=zone_shp$ZR16, pattern="^R-")] <- "Residential zones"
zone_shp$ZR16_simple[grep(x=zone_shp$ZR16, pattern="^RF-")] <- "Residential flat zones"
zone_shp$ZR16_simple[grep(x=zone_shp$ZR16, pattern="^MU-")] <- "Mixed use zones"

# show on a map:
factpal <- colorFactor(palette = "Set1", domain = zone_shp$ZR16_simple)

leaflet(zone_shp) %>%
  addProviderTiles(providers$CartoDB.Positron) %>%
  addPolygons(fillColor = ~factpal(ZR16_simple), # Apply the color function
              fillOpacity = 0.7,
              weight = 1,
              opacity = 1,
              color = "white",
              label=~ZR16,
              highlightOptions = highlightOptions(weight = 3,
                                                  color = "white",
                                                  bringToFront = TRUE
        )
  ) %>%
  addLegend(pal = factpal, values = ~ZR16_simple, opacity = 0.7, title = NULL,
    position = "bottomright")

Calculate the total amount of residential and MU land in DC:

# need to fix broken geometries:
fix_geo_if_broken <- function(shp) {
  if (min(sf::st_is_valid(shp)) == 0) {
    print("Fixing geometry...")
    return(sf::st_make_valid(shp))
  } else {
      return(shp)
    }
  }

zone_shp <- fix_geo_if_broken(zone_shp)
## [1] "Fixing geometry..."
hd_shp <- fix_geo_if_broken(hd_shp)
ward_shp <- fix_geo_if_broken(ward_shp)
t60_shp <- fix_geo_if_broken(t60_shp)
t70_shp <- fix_geo_if_broken(t70_shp)
t80_shp <- fix_geo_if_broken(t80_shp)
t90_shp <- fix_geo_if_broken(t90_shp)
t00_shp <- fix_geo_if_broken(t00_shp)
t10_shp <- fix_geo_if_broken(t10_shp)
t20_shp <- fix_geo_if_broken(t20_shp)


zone_shp$area_meters <- sf::st_area(zone_shp)
zone_shp$area_acres <- as.vector(zone_shp$area_meters * 0.000247105)
total_zone_acres <- sum(zone_shp$area_acres, na.rm=T)

Total and % of land area covered by HDs over time:

# get shape areas:
hd_shp$area_meters <- sf::st_area(hd_shp)
hd_shp$area_acres <- as.vector(hd_shp$area_meters * 0.000247105)

years <- c(1960, 1970, 1980, 1990, 2000, 2010, 2025)
land_areas <- rep(NA, length(years))
counter <- 1
for (year in years) { # Land area covered by HDs by year:
  land_area <- sum(hd_shp$area_acres[hd_shp$desig_date < year])
  land_areas[counter] <- land_area
  counter <- counter + 1
}

p <- data.frame(years, land_areas, round(100*land_areas / total_zone_acres,0))
names(p) <- c("Year", 
              "Area covered by by Historic Districts, in Acres",
              "Percent of 2016 residential zone covered by Neighborhood HD")
plot1 <-
  ggplot(p, 
       aes(x=Year, 
           y=`Area covered by by Historic Districts, in Acres`)) + 
  geom_bar(stat = "identity", fill="#0f9535") +
  geom_text(aes(label = round(`Area covered by by Historic Districts, in Acres`, 0), vjust = -1.7)) +
  ylab("Acres") +
  theme_minimal() +
  ggtitle('Acres covered by "neighborhood historic districts"\nhas steadily increased over time')

plot2 <-
  ggplot(p, 
       aes(x=Year, 
           y=`Percent of 2016 residential zone covered by Neighborhood HD`)) + 
  geom_line(color="#0f9535", size=.75) +
  geom_point(color="#0f9535") +
  geom_text(aes(label = paste0(`Percent of 2016 residential zone covered by Neighborhood HD`, "%")), vjust = -1.7) +
  theme_minimal() +
  ylab("Percent") +
  ggtitle('And the % of residential area covered\nby HDs has more than doubled since 1980')

plot1 + plot2

Let’s quickly compare which HDs were designated before vs after 1980:

hd_shp$flag_1980 <- 0
hd_shp$flag_1980[hd_shp$desig_date < 1980] <- 1

leaflet() %>%
  addProviderTiles(providers$CartoDB.Positron) %>%
  addPolygons(group= "Designated before 1980",
              data=hd_shp[hd_shp$flag_1980==1,],
              fillColor = "skyblue", 
              fillOpacity = 0.7,
              weight = 1,
              opacity = 1,
              color = "white",
              label=~LABEL,
              highlightOptions = highlightOptions(weight = 3,
                                                  color = "white",
                                                  bringToFront = FALSE
        ) 
  ) %>%
  addPolygons(group= "Designated after 1980",
              data=hd_shp[hd_shp$flag_1980==0,],
              fillColor = "hotpink", 
              fillOpacity = 0.7,
              weight = 1,
              opacity = 1,
              color = "white",
              label=~LABEL,
              highlightOptions = highlightOptions(weight = 3,
                                                  color = "white",
                                                  bringToFront = FALSE
        ) 
  ) %>%
  addLayersControl(
    overlayGroups = c("Designated before 1980", "Designated after 1980"),
    options = layersControlOptions(collapsed = FALSE)
  )

We’ll remove some very small HDs for the next part (like HDs that are a single circle):

# Drop some very small HDs that are like a single circle
# We just dropped anything smaller than 10 acres
hd_shp <-
  hd_shp %>%
  filter(!(LABEL %in% c("Emerald St HD",
                       "Grant Rd HD",
                       "Mount Vernon Triangle HD",
                       "Grant Circle HD",
                       "Union Market"
                       )))

# turn off spherical geometry (s2)
sf_use_s2(FALSE)

Now let’s look and see how HDs changed over time, in terms of % black residents and % white residents, compared to nearby neighborhoods (tracts) that were not in HDs.

This will have a few steps:

  1. Take a given HD.
  2. Get the intersection of the HD with census tracts. If at least X% of the census tract area is in the HD, we’ll count it as part of the HD.
  3. Create a buffer around the HD of distance Y. Run another intersection with the census tracts and that buffer. Count any tracts that touch the buffer but are not in an HD as comparison tracts.
  4. Note the % of white and black residents in each HD and those HDs comparison tracts.
  5. See how those respective percentages change over time.

Function to find which tracts/blocks are in which HDs:

get_geos_in_hd <- function(shp, min_pct, year) {
  # This function gets the tracts that are in each HD in a given year
  # shp: the tract or block shapefile (an sf shapefile object)
  # min_pct: the minimum % of the tract or block that must be in the HD to count as part of the HD (a decimal # between 0 and 1)
  # year: the year (an integer like 1980)
  i <- sf::st_intersection(x=shp, y=hd_shp)
  i$i_area <- sf::st_area(i)
  i$pct_of_geo_area <- as.vector(i$i_area / i$geo_area_meters)
  
  geos_in_hd <- i[i$pct_of_geo_area > min_pct, 
                    c("year", "geo_id", "LABEL",
                      "n_tot", "n_black", "n_white", "n_other",
                      "desig_date", "pct_of_geo_area")]
  geos_in_hd_summary <-
    geos_in_hd %>%
    mutate(n_tot_prorated = n_tot * pct_of_geo_area,
           n_black_prorated = n_black * pct_of_geo_area,
           n_white_prorated = n_white * pct_of_geo_area,
           n_other_prorated = n_other * pct_of_geo_area) %>%
    select("year", "LABEL", "geo_id", starts_with("n_"), "desig_date") %>%
    group_by(LABEL) %>%
      summarise(n_tot = sum(n_tot, na.rm=T),
                n_black = sum(n_black, na.rm=T),
                n_white = sum(n_white, na.rm=T),
                n_other = sum(n_other, na.rm=T),
                n_tot_prorated = sum(n_tot_prorated, na.rm=T),
                n_black_prorated = sum(n_black_prorated, na.rm=T),
                n_white_prorated = sum(n_white_prorated, na.rm=T),
                n_other_prorated = sum(n_other_prorated, na.rm=T),
                desig_year = max(desig_date, na.rm=T)) %>%
    mutate(desig_yet = ifelse(desig_year<year, 1, 0), 
           year = year)
  
    rv = list("geos_in_hd"=geos_in_hd, "summary"=sf::st_drop_geometry(geos_in_hd_summary))
    
    return(rv)
}

Function to find which tracts/blocks are nearby but not inside the HDs:

get_neighbor_geos <- function(hd_shp, geo_shp, buffer_dist, geos_in_hd, remove_geo_thresh, year) {
  # first make a buffer around the HDs
  b <- sf::st_buffer(hd_shp, dist = buffer_dist)
  # then get the intersection of the buffer and the tracts
  i <- sf::st_intersection(x=geo_shp, y=b)
  # remove tracts that have already been classified as within an HD
  i <- i[!(i$geo_id %in% geos_in_hd$geos_in_hd$geo_id),]
  # also remove any tracts/blocks for which more than X% of an HD is in that tract/block
  hd_shp$area_meters <- sf::st_area(hd_shp)
  hd_geo <- sf::st_intersection(x=geo_shp, y=hd_shp)
  hd_geo$intersect_area <- sf::st_area(hd_geo)
  hd_geo$pct_area <- as.vector(hd_geo$intersect_area / hd_geo$area_meters)
  geos_to_remove <- hd_geo$geo_id[hd_geo$pct_area > remove_geo_thresh]
  neighboring_geos <- i[!(i$geo_id %in% geos_to_remove),]
  
  # finally, remove
  neighboring_geos <- geo_shp[geo_shp$geo_id %in% neighboring_geos$geo_id,]
  neighboring_geos <- dplyr::left_join(neighboring_geos, 
                                         sf::st_drop_geometry(i[,c("geo_id", "LABEL", "desig_date")]),
                                         by="geo_id")
  
  neighbor_geos_summary <-
    sf::st_drop_geometry(neighboring_geos) %>%
    group_by(LABEL) %>%
      summarise(n_tot = sum(n_tot, na.rm=T),
                n_black = sum(n_black, na.rm=T),
                n_white = sum(n_white, na.rm=T),
                n_other = sum(n_other, na.rm=T),
                desig_year = max(desig_date, na.rm=T)) %>%
    mutate(desig_yet = ifelse(desig_year<year, 1, 0),
           year = year)
  
  rv = list("buffers" = b, 
            "neighbor_geos" = neighboring_geos, 
            "neighbor_geos_summary" = neighbor_geos_summary)
  
  return(rv)
}

Function to map those tracts/blocks and see if everything looks good:

plot_geos <- function(geo_shp, nearby_geos_shp, geos_in_hds) {
  rv <-
    leaflet() %>%
  addProviderTiles(providers$CartoDB.Positron) %>%
  addPolygons(group='buffers', data=nearby_geos_shp[["buffers"]]) %>%
  addPolygons(group= "geos labeled as near HDs",
              data=nearby_geos_shp[["neighbor_geos"]],
              fillColor = "skyblue", 
              fillOpacity = 0.7,
              weight = 1,
              opacity = 1,
              color = "white",
              label=~paste0("Geo: ", geo_id, "; HD neighbor: ", LABEL),
              highlightOptions = highlightOptions(weight = 3,
                                                  color = "white",
                                                  bringToFront = FALSE
        ) 
  ) %>%
    addPolygons(group= "geos labeled as in HDs",
              data=geo_shp[geo_shp$geo_id %in% geos_in_hds, ],
              fillColor = "limegreen", 
              fillOpacity = 0.7,
              weight = 1,
              opacity = 1,
              color = "white",
              label=~geo_id,
              highlightOptions = highlightOptions(weight = 3,
                                                  color = "white",
                                                  bringToFront = FALSE
        ) 
  ) %>%
  addPolygons(group= "HDs",
              data=hd_shp,
              fillColor = "hotpink", 
              fillOpacity = 0.7,
              weight = 1,
              opacity = 1,
              color = "white",
              label=~LABEL,
              highlightOptions = highlightOptions(weight = 3,
                                                  color = "white",
                                                  bringToFront = FALSE
        ) 
  ) %>%
  addLayersControl(
    overlayGroups = c("geos labeled as near HDs", "geos labeled as in HDs", "HDs", "buffers"),
    options = layersControlOptions(collapsed = FALSE)
  )
  
  return(rv)
}

Function to run regressions:

run_regressions <- function(hd_comp_df, near_comp_df, weights=F) {
  
  hd_comp_df <-
    hd_comp_df %>%
    select(-ends_with("_prorated"), -desig_year) %>%
    group_by(LABEL, desig_yet, year) %>%
    summarise_all(., sum) %>%
    mutate(pct_black = n_black / n_tot,
           pct_white = n_white / n_tot) %>%
    rename_with(~ paste0(., "_hd"))
  
  near_comp_df <-
    near_comp_df %>%
    select(-desig_year) %>%
    group_by(LABEL, desig_yet, year) %>%
    mutate(pct_black = n_black / n_tot,
           pct_white = n_white / n_tot) %>%
    summarise_all(., sum) %>%
    rename_with(~ paste0(., "_near"))
  
  comp_df <- dplyr::full_join(x = hd_comp_df, 
                              y = near_comp_df, 
                              by = c("LABEL_hd"="LABEL_near", 
                                     "desig_yet_hd"="desig_yet_near",
                                     "year_hd"="year_near")) 
  
  comp_df_copy <- comp_df
  comp_df <- comp_df %>% select(starts_with(c("LABEL", "desig_yet", "year", "pct_")))
  
  comp_df <-
    comp_df %>%
    tidyr::pivot_longer(
      cols = starts_with("pct_"),
      names_to = c("group", "treatment_control"),
      names_pattern = "pct_([a-zA-Z]+)_([a-zA-Z]+)",
      values_to = "percent")
  
  comp_df <- dplyr::left_join(x = comp_df, 
                              y = comp_df_copy %>% select("LABEL_hd", "desig_yet_hd", "year_hd", "n_tot_hd"), 
                              by = c("LABEL_hd", 
                                     "desig_yet_hd",
                                     "year_hd"))
  
  # remove HDs that are always HDs or always not HDs during the timeframe
  comp_df <-
    comp_df %>%
    group_by(LABEL_hd) %>%
    mutate(mean_status = mean(desig_yet_hd, na.rm=T)) %>%
    filter(mean_status > 0) %>%
    filter(mean_status < 1)
  
  comp_df$treated <- 0
  comp_df$treated[comp_df$treatment_control=="hd"] <- 1
  
  comp_df$LABEL_hd <- as.factor(comp_df$LABEL_hd)
  comp_df$year_hd  <- as.factor(comp_df$year_hd)
  
  if (weights) {
    # diff in diff analysis for change in the % of black residents
    diff_in_diff_b <- lm(percent ~               # outcome: % of white or black residents
                         treated +               # "treatment": whether tract is in a HD or not
                         desig_yet_hd +          # pre/post indicator: whether HD was designated yet
                         treated:LABEL_hd +      # controlling for average % within each set of tracts/blocks (either in or near each HD)   
                         treated:desig_yet_hd +  # D-in-D estimator: effect of treatment after implemented
                         LABEL_hd  +             # fixed effect for HD area
                         year_hd,                # fixed effect for year
                       data = comp_df[comp_df$group=="black",],
                       weights=n_tot_hd)
    
    # diff in diff analysis for change in the % of white residents
    diff_in_diff_w <- lm(percent ~               # outcome: % of white or black residents
                         treated +               # "treatment": whether tract is in a HD or not
                         desig_yet_hd +          # pre/post indicator: whether HD was designated yet
                        treated:LABEL_hd +       # controlling for average % within each set of tracts/blocks (either in or near each HD)
                         treated:desig_yet_hd +  # D-in-D estimator: effect of treatment after implemented
                         LABEL_hd  +             # fixed effect for HD area
                         year_hd,                # fixed effect for year
                       data = comp_df[comp_df$group=="white",],
                       weights=n_tot_hd)
    text_bit <- ", weighted by HD population"
  } else {
    # diff in diff analysis for change in the % of black residents
    diff_in_diff_b <- lm(percent ~                 # outcome: % of white or black residents
                         treated +               # "treatment": whether tract is in a HD or not
                         desig_yet_hd +          # pre/post indicator: whether HD was designated yet
                         treated:LABEL_hd +      # controlling for average % within each set of tracts/blocks (either in or near each HD)
                         treated:desig_yet_hd +  # D-in-D estimator: effect of treatment after implemented
                         LABEL_hd  +             # fixed effect for HD area
                         year_hd,                # fixed effect for year
                       data = comp_df[comp_df$group=="black",])
    
    # diff in diff analysis for change in the % of white residents
    diff_in_diff_w <- lm(percent ~                 # outcome: % of white or black residents
                         treated +               # "treatment": whether tract is in a HD or not
                         desig_yet_hd +          # pre/post indicator: whether HD was designated yet
                         treated:LABEL_hd +      # controlling for average % within each set of tracts/blocks (either in or near each HD)
                         treated:desig_yet_hd +  # D-in-D estimator: effect of treatment after implemented
                         LABEL_hd  +             # fixed effect for HD area
                         year_hd,                # fixed effect for year
                       data = comp_df[comp_df$group=="white",])
    text_bit <- ", NOT weighted by HD population"
  }
  
  print("__________________________________")
  print(paste0("D-in-D regression for the % of black residents", text_bit))
  print(summary(diff_in_diff_b))
  
  print("__________________________________")
  print(paste0("D-in-D regression for the % of white residents", text_bit))
  print(summary(diff_in_diff_w))
  
  comp_df <-
    comp_df %>%
    group_by(LABEL_hd, desig_yet_hd) %>%
    mutate(first_decade_desig=min(as.numeric(as.character(year_hd)))) %>%
    mutate(first_decade_desig=ifelse(desig_yet_hd==0, 0, first_decade_desig)) %>%
    ungroup() %>%
    group_by(LABEL_hd) %>%
    mutate(first_decade_desig=max(first_decade_desig)) %>%
    mutate(year_index = as.numeric(as.character(year_hd)) - first_decade_desig)

  comp_df <-
    comp_df %>%
    ungroup() %>%
    group_by(LABEL_hd, group, treatment_control) %>%
    mutate(percent_std = (percent - mean(percent, na.rm=T)) / sd(percent, na.rm=T))
    
  return(comp_df)
}

Call all the functions we created above:

# ranging the min % between .2 and .6 seems to give reasonable results
mp = 0.25 
hd_geos60 <- get_geos_in_hd(t60_shp, min_pct = mp, year = 1960)
hd_geos70 <- get_geos_in_hd(t70_shp, min_pct = mp, year = 1970)
hd_geos80 <- get_geos_in_hd(t80_shp, min_pct = mp, year = 1980)
hd_geos90 <- get_geos_in_hd(t90_shp, min_pct = mp, year = 1990)
hd_geos00 <- get_geos_in_hd(t00_shp, min_pct = mp, year = 2000)
hd_geos10 <- get_geos_in_hd(t10_shp, min_pct = mp, year = 2010)
hd_geos20 <- get_geos_in_hd(t20_shp, min_pct = mp, year = 2020)
gc()
##           used (Mb) gc trigger  (Mb) max used  (Mb)
## Ncells 1474887 78.8    2423672 129.5  2423672 129.5
## Vcells 4400880 33.6   10146329  77.5 10146318  77.5
# the buffer distance is in decimal degrees
buff_dist = .005
threshold = .1

nearby_tracts60 <- 
  get_neighbor_geos(hd_shp=hd_shp, geo_shp=t60_shp, buffer_dist=buff_dist, geos_in_hd=hd_geos70,threshold, 1960)
nearby_tracts70 <- 
  get_neighbor_geos(hd_shp=hd_shp, geo_shp=t70_shp, buffer_dist=buff_dist, geos_in_hd=hd_geos70,threshold, 1970)
nearby_tracts80 <- 
  get_neighbor_geos(hd_shp=hd_shp, geo_shp=t80_shp, buffer_dist=buff_dist, geos_in_hd=hd_geos80,threshold, 1980)
nearby_tracts90 <- 
  get_neighbor_geos(hd_shp=hd_shp, geo_shp=t90_shp, buffer_dist=buff_dist, geos_in_hd=hd_geos90,threshold, 1990)
gc()
##           used (Mb) gc trigger  (Mb) max used  (Mb)
## Ncells 1474763 78.8    2423672 129.5  2423672 129.5
## Vcells 4476452 34.2   10146329  77.5 10146318  77.5
nearby_tracts00 <- 
  get_neighbor_geos(hd_shp=hd_shp, geo_shp=t00_shp, buffer_dist=buff_dist, geos_in_hd=hd_geos00,threshold, 2000)
nearby_tracts10 <- 
  get_neighbor_geos(hd_shp=hd_shp, geo_shp=t10_shp, buffer_dist=buff_dist, geos_in_hd=hd_geos10,threshold, 2010)
nearby_tracts20 <- 
  get_neighbor_geos(hd_shp=hd_shp, geo_shp=t20_shp, buffer_dist=buff_dist, geos_in_hd=hd_geos20,threshold, 2020)
gc()
##           used (Mb) gc trigger  (Mb) max used  (Mb)
## Ncells 1475897 78.9    2423672 129.5  2423672 129.5
## Vcells 4534221 34.6   10146329  77.5 10146318  77.5

Plot our classifications in 1960 and 2020, as a gut check:

plot_geos(t60_shp, nearby_tracts60, hd_geos60$geos_in_hd$geo_id)
plot_geos(t20_shp, nearby_tracts20, hd_geos20$geos_in_hd$geo_id)

Now compare the demographics of the HD tracts and their neighbors in each year:

options(width = 200)

hd_comp_df <- dplyr::bind_rows(hd_geos60[[2]], hd_geos70[[2]], hd_geos80[[2]], hd_geos90[[2]],
                            hd_geos00[[2]], hd_geos10[[2]], hd_geos20[[2]],)
near_comp_df <- dplyr::bind_rows(nearby_tracts60[[3]], nearby_tracts70[[3]],nearby_tracts80[[3]], nearby_tracts90[[3]],
                            nearby_tracts00[[3]], nearby_tracts10[[3]], nearby_tracts20[[3]],)

comp_df <- run_regressions(hd_comp_df, near_comp_df, weights = T)
## [1] "__________________________________"
## [1] "D-in-D regression for the % of black residents, weighted by HD population"
## 
## Call:
## lm(formula = percent ~ treated + desig_yet_hd + treated:LABEL_hd + 
##     treated:desig_yet_hd + LABEL_hd + year_hd, data = comp_df[comp_df$group == 
##     "black", ], weights = n_tot_hd)
## 
## Weighted Residuals:
##      Min       1Q   Median       3Q      Max 
## -31.5855  -5.4348   0.6459   5.3858  24.0898 
## 
## Coefficients: (2 not defined because of singularities)
##                                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                            1.05882    0.10459  10.124  < 2e-16 ***
## treated                                0.18493    0.14612   1.266 0.207520    
## desig_yet_hd                          -0.09368    0.03898  -2.403 0.017439 *  
## LABEL_hdBloomingdale HD               -0.27812    0.11411  -2.437 0.015921 *  
## LABEL_hdCapitol Hill HD               -0.20936    0.10375  -2.018 0.045320 *  
## LABEL_hdCleveland Park HD             -0.87054    0.11516  -7.559 3.24e-12 ***
## LABEL_hdDupont Circle HD              -0.90000    0.10723  -8.393 2.69e-14 ***
## LABEL_hdGeorgetown HD                 -0.90548    0.10632  -8.517 1.30e-14 ***
## LABEL_hdGreater 14th St HD            -0.67034    0.10781  -6.218 4.42e-09 ***
## LABEL_hdGreater U St HD               -0.24179    0.10999  -2.198 0.029398 *  
## LABEL_hdKalorama Triangle HD          -0.70868    0.11529  -6.147 6.34e-09 ***
## LABEL_hdKingman Park HD               -0.13249    0.12767  -1.038 0.300999    
## LABEL_hdMeridian Hill                 -0.50244    0.21553  -2.331 0.021023 *  
## LABEL_hdMt. Pleasant HD               -0.47802    0.10736  -4.452 1.61e-05 ***
## LABEL_hdMt. Vernon Square HD          -0.19691    0.12642  -1.558 0.121371    
## LABEL_hdPennsylvania Ave NHS          -0.39424    0.22474  -1.754 0.081361 .  
## LABEL_hdShaw HD                       -0.22294    0.11646  -1.914 0.057421 .  
## LABEL_hdSheridan-Kalorama HD          -0.76585    0.12422  -6.165 5.77e-09 ***
## LABEL_hdStrivers' Section HD          -0.71288    0.12202  -5.842 2.90e-08 ***
## LABEL_hdWashington Heights HD         -0.73235    0.13871  -5.280 4.27e-07 ***
## year_hd1970                            0.05106    0.02883   1.771 0.078446 .  
## year_hd1980                            0.04111    0.03429   1.199 0.232389    
## year_hd1990                            0.00799    0.03968   0.201 0.840667    
## year_hd2000                           -0.02443    0.04345  -0.562 0.574728    
## year_hd2010                           -0.13427    0.04419  -3.039 0.002787 ** 
## year_hd2020                           -0.21265    0.04519  -4.706 5.54e-06 ***
## treated:LABEL_hdBloomingdale HD       -0.08315    0.16044  -0.518 0.604999    
## treated:LABEL_hdCapitol Hill HD       -0.49120    0.14646  -3.354 0.001001 ** 
## treated:LABEL_hdCleveland Park HD     -0.15507    0.16272  -0.953 0.342064    
## treated:LABEL_hdDupont Circle HD       0.02676    0.15059   0.178 0.859185    
## treated:LABEL_hdGeorgetown HD         -0.06689    0.14994  -0.446 0.656129    
## treated:LABEL_hdGreater 14th St HD          NA         NA      NA       NA    
## treated:LABEL_hdGreater U St HD       -0.25375    0.15524  -1.635 0.104153    
## treated:LABEL_hdKalorama Triangle HD  -0.23395    0.16278  -1.437 0.152655    
## treated:LABEL_hdKingman Park HD       -0.14589    0.17900  -0.815 0.416311    
## treated:LABEL_hdMeridian Hill         -0.12356    0.30352  -0.407 0.684497    
## treated:LABEL_hdMt. Pleasant HD       -0.27673    0.15168  -1.824 0.070003 .  
## treated:LABEL_hdMt. Vernon Square HD  -0.14614    0.17832  -0.820 0.413726    
## treated:LABEL_hdPennsylvania Ave NHS  -0.64839    0.31751  -2.042 0.042825 *  
## treated:LABEL_hdShaw HD               -0.20708    0.16434  -1.260 0.209506    
## treated:LABEL_hdSheridan-Kalorama HD  -0.27986    0.17552  -1.594 0.112858    
## treated:LABEL_hdStrivers' Section HD        NA         NA      NA       NA    
## treated:LABEL_hdWashington Heights HD -0.18861    0.19505  -0.967 0.335036    
## treated:desig_yet_hd                  -0.12419    0.03614  -3.436 0.000756 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.25 on 156 degrees of freedom
##   (214 observations deleted due to missingness)
## Multiple R-squared:  0.9054, Adjusted R-squared:  0.8805 
## F-statistic: 36.41 on 41 and 156 DF,  p-value: < 2.2e-16
## 
## [1] "__________________________________"
## [1] "D-in-D regression for the % of white residents, weighted by HD population"
## 
## Call:
## lm(formula = percent ~ treated + desig_yet_hd + treated:LABEL_hd + 
##     treated:desig_yet_hd + LABEL_hd + year_hd, data = comp_df[comp_df$group == 
##     "white", ], weights = n_tot_hd)
## 
## Weighted Residuals:
##     Min      1Q  Median      3Q     Max 
## -28.766  -4.428  -0.827   4.921  36.125 
## 
## Coefficients: (2 not defined because of singularities)
##                                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                            0.03215    0.10775   0.298 0.765818    
## treated                               -0.17327    0.15054  -1.151 0.251490    
## desig_yet_hd                           0.07502    0.04016   1.868 0.063677 .  
## LABEL_hdBloomingdale HD                0.20097    0.11756   1.709 0.089358 .  
## LABEL_hdCapitol Hill HD                0.14925    0.10689   1.396 0.164619    
## LABEL_hdCleveland Park HD              0.76410    0.11865   6.440 1.40e-09 ***
## LABEL_hdDupont Circle HD               0.77472    0.11048   7.013 6.68e-11 ***
## LABEL_hdGeorgetown HD                  0.81324    0.10954   7.424 6.90e-12 ***
## LABEL_hdGreater 14th St HD             0.51748    0.11107   4.659 6.77e-06 ***
## LABEL_hdGreater U St HD                0.13183    0.11331   1.163 0.246450    
## LABEL_hdKalorama Triangle HD           0.56053    0.11878   4.719 5.23e-06 ***
## LABEL_hdKingman Park HD                0.07428    0.13154   0.565 0.573090    
## LABEL_hdMeridian Hill                  0.29055    0.22205   1.308 0.192631    
## LABEL_hdMt. Pleasant HD                0.30936    0.11061   2.797 0.005811 ** 
## LABEL_hdMt. Vernon Square HD           0.12181    0.13025   0.935 0.351113    
## LABEL_hdPennsylvania Ave NHS           0.28845    0.23154   1.246 0.214720    
## LABEL_hdShaw HD                        0.12252    0.11999   1.021 0.308796    
## LABEL_hdSheridan-Kalorama HD           0.62106    0.12798   4.853 2.93e-06 ***
## LABEL_hdStrivers' Section HD           0.59066    0.12571   4.699 5.71e-06 ***
## LABEL_hdWashington Heights HD          0.56607    0.14291   3.961 0.000113 ***
## year_hd1970                           -0.05568    0.02970  -1.875 0.062690 .  
## year_hd1980                           -0.06698    0.03533  -1.896 0.059862 .  
## year_hd1990                           -0.05089    0.04088  -1.245 0.215035    
## year_hd2000                           -0.11838    0.04476  -2.645 0.009010 ** 
## year_hd2010                            0.04674    0.04552   1.027 0.306140    
## year_hd2020                            0.03605    0.04656   0.774 0.439956    
## treated:LABEL_hdBloomingdale HD        0.07711    0.16529   0.466 0.641520    
## treated:LABEL_hdCapitol Hill HD        0.47601    0.15090   3.155 0.001929 ** 
## treated:LABEL_hdCleveland Park HD      0.15973    0.16764   0.953 0.342175    
## treated:LABEL_hdDupont Circle HD      -0.03265    0.15515  -0.210 0.833587    
## treated:LABEL_hdGeorgetown HD          0.06957    0.15448   0.450 0.653072    
## treated:LABEL_hdGreater 14th St HD          NA         NA      NA       NA    
## treated:LABEL_hdGreater U St HD        0.24561    0.15993   1.536 0.126633    
## treated:LABEL_hdKalorama Triangle HD   0.26579    0.16770   1.585 0.115018    
## treated:LABEL_hdKingman Park HD        0.13516    0.18442   0.733 0.464705    
## treated:LABEL_hdMeridian Hill          0.10298    0.31270   0.329 0.742351    
## treated:LABEL_hdMt. Pleasant HD        0.22660    0.15627   1.450 0.149065    
## treated:LABEL_hdMt. Vernon Square HD   0.09530    0.18371   0.519 0.604661    
## treated:LABEL_hdPennsylvania Ave NHS   0.39686    0.32712   1.213 0.226891    
## treated:LABEL_hdShaw HD                0.15703    0.16931   0.928 0.355098    
## treated:LABEL_hdSheridan-Kalorama HD   0.30493    0.18083   1.686 0.093746 .  
## treated:LABEL_hdStrivers' Section HD        NA         NA      NA       NA    
## treated:LABEL_hdWashington Heights HD  0.21865    0.20095   1.088 0.278239    
## treated:desig_yet_hd                   0.11663    0.03723   3.132 0.002071 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.56 on 156 degrees of freedom
##   (214 observations deleted due to missingness)
## Multiple R-squared:  0.8763, Adjusted R-squared:  0.8438 
## F-statistic: 26.95 on 41 and 156 DF,  p-value: < 2.2e-16
comp_df <- run_regressions(hd_comp_df, near_comp_df, weights = F)
## [1] "__________________________________"
## [1] "D-in-D regression for the % of black residents, NOT weighted by HD population"
## 
## Call:
## lm(formula = percent ~ treated + desig_yet_hd + treated:LABEL_hd + 
##     treated:desig_yet_hd + LABEL_hd + year_hd, data = comp_df[comp_df$group == 
##     "black", ])
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.46686 -0.07581  0.00862  0.08239  0.30110 
## 
## Coefficients: (15 not defined because of singularities)
##                                               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                                    0.87444    0.05459  16.019  < 2e-16 ***
## treated                                        0.30431    0.08895   3.421 0.000732 ***
## desig_yet_hd                                  -0.03094    0.03147  -0.983 0.326477    
## LABEL_hdBlagden Alley/Naylor Court HD         -0.06412    0.07110  -0.902 0.367989    
## LABEL_hdBloomingdale HD                       -0.10618    0.07278  -1.459 0.145887    
## LABEL_hdCapitol Hill HD                       -0.08029    0.07052  -1.138 0.256082    
## LABEL_hdCleveland Park HD                     -0.73163    0.07067 -10.353  < 2e-16 ***
## LABEL_hdDowntown HD                           -0.13060    0.07180  -1.819 0.070166 .  
## LABEL_hdDupont Circle HD                      -0.77121    0.07348 -10.495  < 2e-16 ***
## LABEL_hdFinancial HD                          -0.50223    0.14248  -3.525 0.000507 ***
## LABEL_hdFoggy Bottom HD                       -0.77357    0.07366 -10.502  < 2e-16 ***
## LABEL_hdFoxhall HD                            -0.81664    0.07180 -11.373  < 2e-16 ***
## LABEL_hdGeorgetown HD                         -0.79710    0.07067 -11.280  < 2e-16 ***
## LABEL_hdGreater 14th St HD                    -0.65218    0.08437  -7.730 2.91e-13 ***
## LABEL_hdGreater U St HD                       -0.09305    0.07110  -1.309 0.191869    
## LABEL_hdGWU / Old West End HD                 -0.78353    0.07511 -10.431  < 2e-16 ***
## LABEL_hdKalorama Triangle HD                  -0.56433    0.07067  -7.986 5.73e-14 ***
## LABEL_hdKingman Park HD                        0.03506    0.07278   0.482 0.630420    
## LABEL_hdLafayette Square HD                   -0.80221    0.07745 -10.358  < 2e-16 ***
## LABEL_hdLeDroit Park HD                       -0.04098    0.07052  -0.581 0.561718    
## LABEL_hdLogan Circle HD                       -0.09731    0.07350  -1.324 0.186753    
## LABEL_hdMassachusetts Ave HD                  -0.77121    0.07348 -10.495  < 2e-16 ***
## LABEL_hdMeridian Hill                         -0.28747    0.07278  -3.950 0.000103 ***
## LABEL_hdMt. Pleasant HD                       -0.34134    0.07067  -4.830 2.43e-06 ***
## LABEL_hdMt. Vernon Square HD                  -0.03957    0.07110  -0.557 0.578348    
## LABEL_hdPennsylvania Ave NHS                  -0.21627    0.07110  -3.042 0.002610 ** 
## LABEL_hdShaw HD                               -0.05059    0.07110  -0.712 0.477434    
## LABEL_hdSheridan-Kalorama HD                  -0.62627    0.07067  -8.862  < 2e-16 ***
## LABEL_hdSixteenth St HD                       -0.13960    0.07052  -1.980 0.048897 *  
## LABEL_hdStrivers' Section HD                  -0.71645    0.08578  -8.353 5.29e-15 ***
## LABEL_hdTakoma Park HD                        -0.16871    0.07067  -2.387 0.017738 *  
## LABEL_hdWashington Heights HD                 -0.57317    0.07180  -7.983 5.84e-14 ***
## LABEL_hdWoodley Park HD                       -0.69250    0.07110  -9.740  < 2e-16 ***
## year_hd1970                                    0.07627    0.02933   2.600 0.009898 ** 
## year_hd1980                                    0.07385    0.03076   2.401 0.017121 *  
## year_hd1990                                    0.03373    0.03336   1.011 0.313010    
## year_hd2000                                   -0.01008    0.03691  -0.273 0.785054    
## year_hd2010                                   -0.13344    0.03975  -3.357 0.000916 ***
## year_hd2020                                   -0.21566    0.04148  -5.199 4.27e-07 ***
## treated:LABEL_hdBlagden Alley/Naylor Court HD       NA         NA      NA       NA    
## treated:LABEL_hdBloomingdale HD               -0.21952    0.11218  -1.957 0.051524 .  
## treated:LABEL_hdCapitol Hill HD               -0.62626    0.10910  -5.740 2.83e-08 ***
## treated:LABEL_hdCleveland Park HD             -0.27242    0.10953  -2.487 0.013549 *  
## treated:LABEL_hdDowntown HD                         NA         NA      NA       NA    
## treated:LABEL_hdDupont Circle HD              -0.09631    0.11099  -0.868 0.386437    
## treated:LABEL_hdFinancial HD                        NA         NA      NA       NA    
## treated:LABEL_hdFoggy Bottom HD                     NA         NA      NA       NA    
## treated:LABEL_hdFoxhall HD                          NA         NA      NA       NA    
## treated:LABEL_hdGeorgetown HD                 -0.18449    0.10891  -1.694 0.091562 .  
## treated:LABEL_hdGreater 14th St HD                  NA         NA      NA       NA    
## treated:LABEL_hdGreater U St HD               -0.38645    0.11018  -3.507 0.000540 ***
## treated:LABEL_hdGWU / Old West End HD               NA         NA      NA       NA    
## treated:LABEL_hdKalorama Triangle HD          -0.35051    0.10953  -3.200 0.001557 ** 
## treated:LABEL_hdKingman Park HD               -0.26041    0.11382  -2.288 0.023005 *  
## treated:LABEL_hdLafayette Square HD                 NA         NA      NA       NA    
## treated:LABEL_hdLeDroit Park HD                     NA         NA      NA       NA    
## treated:LABEL_hdLogan Circle HD                     NA         NA      NA       NA    
## treated:LABEL_hdMassachusetts Ave HD                NA         NA      NA       NA    
## treated:LABEL_hdMeridian Hill                 -0.33011    0.16493  -2.002 0.046458 *  
## treated:LABEL_hdMt. Pleasant HD               -0.39401    0.10953  -3.597 0.000390 ***
## treated:LABEL_hdMt. Vernon Square HD          -0.28161    0.11175  -2.520 0.012380 *  
## treated:LABEL_hdPennsylvania Ave NHS          -0.77159    0.13821  -5.583 6.36e-08 ***
## treated:LABEL_hdShaw HD                       -0.38557    0.11018  -3.499 0.000556 ***
## treated:LABEL_hdSheridan-Kalorama HD          -0.39484    0.10953  -3.605 0.000379 ***
## treated:LABEL_hdSixteenth St HD                     NA         NA      NA       NA    
## treated:LABEL_hdStrivers' Section HD                NA         NA      NA       NA    
## treated:LABEL_hdTakoma Park HD                      NA         NA      NA       NA    
## treated:LABEL_hdWashington Heights HD         -0.32746    0.11818  -2.771 0.006028 ** 
## treated:LABEL_hdWoodley Park HD                     NA         NA      NA       NA    
## treated:desig_yet_hd                          -0.12733    0.03560  -3.577 0.000420 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1319 on 241 degrees of freedom
##   (116 observations deleted due to missingness)
## Multiple R-squared:  0.8857, Adjusted R-squared:  0.8601 
## F-statistic: 34.58 on 54 and 241 DF,  p-value: < 2.2e-16
## 
## [1] "__________________________________"
## [1] "D-in-D regression for the % of white residents, NOT weighted by HD population"
## 
## Call:
## lm(formula = percent ~ treated + desig_yet_hd + treated:LABEL_hd + 
##     treated:desig_yet_hd + LABEL_hd + year_hd, data = comp_df[comp_df$group == 
##     "white", ])
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.29861 -0.07870 -0.01097  0.06902  0.45859 
## 
## Coefficients: (15 not defined because of singularities)
##                                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                                    0.188118   0.051975   3.619 0.000360 ***
## treated                                       -0.255676   0.084692  -3.019 0.002810 ** 
## desig_yet_hd                                   0.021013   0.029963   0.701 0.483792    
## LABEL_hdBlagden Alley/Naylor Court HD          0.009999   0.067694   0.148 0.882696    
## LABEL_hdBloomingdale HD                        0.071260   0.069299   1.028 0.304841    
## LABEL_hdCapitol Hill HD                        0.060986   0.067151   0.908 0.364683    
## LABEL_hdCleveland Park HD                      0.659528   0.067287   9.802  < 2e-16 ***
## LABEL_hdDowntown HD                            0.074103   0.068367   1.084 0.279498    
## LABEL_hdDupont Circle HD                       0.676934   0.069966   9.675  < 2e-16 ***
## LABEL_hdFinancial HD                           0.502429   0.135660   3.704 0.000264 ***
## LABEL_hdFoggy Bottom HD                        0.674389   0.070133   9.616  < 2e-16 ***
## LABEL_hdFoxhall HD                             0.756775   0.068367  11.069  < 2e-16 ***
## LABEL_hdGeorgetown HD                          0.737352   0.067287  10.958  < 2e-16 ***
## LABEL_hdGreater 14th St HD                     0.498293   0.080336   6.203 2.40e-09 ***
## LABEL_hdGreater U St HD                        0.018203   0.067694   0.269 0.788233    
## LABEL_hdGWU / Old West End HD                  0.681116   0.071521   9.523  < 2e-16 ***
## LABEL_hdKalorama Triangle HD                   0.445296   0.067287   6.618 2.34e-10 ***
## LABEL_hdKingman Park HD                       -0.045497   0.069299  -0.657 0.512109    
## LABEL_hdLafayette Square HD                    0.702389   0.073744   9.525  < 2e-16 ***
## LABEL_hdLeDroit Park HD                       -0.015370   0.067151  -0.229 0.819145    
## LABEL_hdLogan Circle HD                        0.047051   0.069981   0.672 0.502016    
## LABEL_hdMassachusetts Ave HD                   0.676934   0.069966   9.675  < 2e-16 ***
## LABEL_hdMeridian Hill                          0.133251   0.069299   1.923 0.055679 .  
## LABEL_hdMt. Pleasant HD                        0.208982   0.067287   3.106 0.002125 ** 
## LABEL_hdMt. Vernon Square HD                  -0.001797   0.067694  -0.027 0.978843    
## LABEL_hdPennsylvania Ave NHS                   0.159063   0.067694   2.350 0.019595 *  
## LABEL_hdShaw HD                               -0.006711   0.067694  -0.099 0.921112    
## LABEL_hdSheridan-Kalorama HD                   0.514577   0.067287   7.648 4.88e-13 ***
## LABEL_hdSixteenth St HD                        0.046388   0.067151   0.691 0.490351    
## LABEL_hdStrivers' Section HD                   0.593076   0.081672   7.262 5.25e-12 ***
## LABEL_hdTakoma Park HD                         0.105951   0.067287   1.575 0.116655    
## LABEL_hdWashington Heights HD                  0.451300   0.068367   6.601 2.58e-10 ***
## LABEL_hdWoodley Park HD                        0.602485   0.067694   8.900  < 2e-16 ***
## year_hd1970                                   -0.083700   0.027931  -2.997 0.003014 ** 
## year_hd1980                                   -0.111101   0.029289  -3.793 0.000188 ***
## year_hd1990                                   -0.086560   0.031764  -2.725 0.006899 ** 
## year_hd2000                                   -0.145944   0.035142  -4.153 4.56e-05 ***
## year_hd2010                                    0.021490   0.037848   0.568 0.570697    
## year_hd2020                                    0.022337   0.039494   0.566 0.572212    
## treated:LABEL_hdBlagden Alley/Naylor Court HD        NA         NA      NA       NA    
## treated:LABEL_hdBloomingdale HD                0.177333   0.106818   1.660 0.098184 .  
## treated:LABEL_hdCapitol Hill HD                0.573138   0.103880   5.517 8.87e-08 ***
## treated:LABEL_hdCleveland Park HD              0.240306   0.104286   2.304 0.022057 *  
## treated:LABEL_hdDowntown HD                          NA         NA      NA       NA    
## treated:LABEL_hdDupont Circle HD               0.053520   0.105683   0.506 0.613027    
## treated:LABEL_hdFinancial HD                         NA         NA      NA       NA    
## treated:LABEL_hdFoggy Bottom HD                      NA         NA      NA       NA    
## treated:LABEL_hdFoxhall HD                           NA         NA      NA       NA    
## treated:LABEL_hdGeorgetown HD                  0.149548   0.103700   1.442 0.150566    
## treated:LABEL_hdGreater 14th St HD                   NA         NA      NA       NA    
## treated:LABEL_hdGreater U St HD                0.337505   0.104913   3.217 0.001473 ** 
## treated:LABEL_hdGWU / Old West End HD                NA         NA      NA       NA    
## treated:LABEL_hdKalorama Triangle HD           0.357435   0.104286   3.427 0.000716 ***
## treated:LABEL_hdKingman Park HD                0.220484   0.108372   2.035 0.042995 *  
## treated:LABEL_hdLafayette Square HD                  NA         NA      NA       NA    
## treated:LABEL_hdLeDroit Park HD                      NA         NA      NA       NA    
## treated:LABEL_hdLogan Circle HD                      NA         NA      NA       NA    
## treated:LABEL_hdMassachusetts Ave HD                 NA         NA      NA       NA    
## treated:LABEL_hdMeridian Hill                  0.250553   0.157039   1.595 0.111916    
## treated:LABEL_hdMt. Pleasant HD                0.306977   0.104286   2.944 0.003561 ** 
## treated:LABEL_hdMt. Vernon Square HD           0.196664   0.106402   1.848 0.065782 .  
## treated:LABEL_hdPennsylvania Ave NHS           0.458397   0.131596   3.483 0.000588 ***
## treated:LABEL_hdShaw HD                        0.282975   0.104913   2.697 0.007486 ** 
## treated:LABEL_hdSheridan-Kalorama HD           0.384730   0.104286   3.689 0.000278 ***
## treated:LABEL_hdSixteenth St HD                      NA         NA      NA       NA    
## treated:LABEL_hdStrivers' Section HD                 NA         NA      NA       NA    
## treated:LABEL_hdTakoma Park HD                       NA         NA      NA       NA    
## treated:LABEL_hdWashington Heights HD          0.314509   0.112528   2.795 0.005609 ** 
## treated:LABEL_hdWoodley Park HD                      NA         NA      NA       NA    
## treated:desig_yet_hd                           0.120513   0.033896   3.555 0.000454 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1256 on 241 degrees of freedom
##   (116 observations deleted due to missingness)
## Multiple R-squared:  0.8758, Adjusted R-squared:  0.8479 
## F-statistic: 31.46 on 54 and 241 DF,  p-value: < 2.2e-16
plot_ly(
    data = comp_df[comp_df$group=="black",] %>% arrange(LABEL_hd, year_index),
    x = ~year_index,
    y = ~percent,  # ~percent_std
    color = ~LABEL_hd,  # Specify the grouping variable for color
    linetype = ~as.factor(treatment_control),
    type = "scatter",
    mode = "lines+markers"
  )

Now we’re going to repeat all of that, but using block groups instead of tracts. We will have to rely on sightly different versions of the Census data to do this, downloaded from NHGIS rather than Open Data DC. We’re also only going back to 1970, since that’s the data I could find relatively easily.

Load and clean block data:

# b70_shp <- sf::st_read("block_shapes/US_block_1970/US_block_1970.shp")
# b70_shp <- b70_shp[b70_shp$STATE70=="11",]
# sf::st_write(b70_shp, "block_shapes/DC_block_1970/DC_block_1970.shp")

b70_shp <- sf::st_read("block_shapes/DC_block_1970/DC_block_1970.shp")
## Reading layer `DC_block_1970' from data source `C:\Users\edwar\Documents\GitHub\hd_analysis\block_shapes\DC_block_1970\DC_block_1970.shp' using driver `ESRI Shapefile'
## Simple feature collection with 4665 features and 6 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 1610795 ymin: 308338.5 xmax: 1629412 ymax: 329313.2
## Projected CRS: USA_Contiguous_Albers_Equal_Area_Conic
b80_shp <- sf::st_read("block_shapes/DC_block_1980/DC_block_1980.shp")
## Reading layer `DC_block_1980' from data source `C:\Users\edwar\Documents\GitHub\hd_analysis\block_shapes\DC_block_1980\DC_block_1980.shp' using driver `ESRI Shapefile'
## Simple feature collection with 4627 features and 10 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 1610795 ymin: 308338.5 xmax: 1629412 ymax: 329313.2
## Projected CRS: USA_Contiguous_Albers_Equal_Area_Conic
b90_shp <- sf::st_read("block_shapes/nhgis0092_shapefile_tl2000_110_block_1990/DC_block_1990.shp")
## Reading layer `DC_block_1990' from data source `C:\Users\edwar\Documents\GitHub\hd_analysis\block_shapes\nhgis0092_shapefile_tl2000_110_block_1990\DC_block_1990.shp' using driver `ESRI Shapefile'
## Simple feature collection with 5140 features and 5 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 1610795 ymin: 308338.5 xmax: 1629412 ymax: 329313.2
## Projected CRS: USA_Contiguous_Albers_Equal_Area_Conic
b00_shp <- sf::st_read("block_shapes/nhgis0092_shapefile_tl2000_110_block_2000/DC_block_2000.shp")
## Reading layer `DC_block_2000' from data source `C:\Users\edwar\Documents\GitHub\hd_analysis\block_shapes\nhgis0092_shapefile_tl2000_110_block_2000\DC_block_2000.shp' using driver `ESRI Shapefile'
## Simple feature collection with 5626 features and 6 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 1610795 ymin: 308338.5 xmax: 1629412 ymax: 329313.2
## Projected CRS: USA_Contiguous_Albers_Equal_Area_Conic
b10_shp <- sf::st_read("block_shapes/nhgis0092_shapefile_tl2010_110_block_2010/DC_block_2010.shp")
## Reading layer `DC_block_2010' from data source `C:\Users\edwar\Documents\GitHub\hd_analysis\block_shapes\nhgis0092_shapefile_tl2010_110_block_2010\DC_block_2010.shp' using driver `ESRI Shapefile'
## Simple feature collection with 6426 features and 18 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 1610830 ymin: 308504.6 xmax: 1629412 ymax: 329361
## Projected CRS: USA_Contiguous_Albers_Equal_Area_Conic
b20_shp <- sf::st_read("block_shapes/nhgis0092_shapefile_tl2020_110_block_2020/DC_block_2020.shp")
## Reading layer `DC_block_2020' from data source `C:\Users\edwar\Documents\GitHub\hd_analysis\block_shapes\nhgis0092_shapefile_tl2020_110_block_2020\DC_block_2020.shp' using driver `ESRI Shapefile'
## Simple feature collection with 5935 features and 18 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 1610830 ymin: 308504.6 xmax: 1629412 ymax: 329396
## Projected CRS: USA_Contiguous_Albers_Equal_Area_Conic
b70_df <- readr::read_csv("block_data/nhgis0093_ds96_1970_block.csv")
b80_df <- readr::read_csv("block_data/nhgis_ds104_1980_block_11.csv")
b90_df <- readr::read_csv("block_data/nhgis0092_ds120_1990_block.csv")
b00_df <- readr::read_csv("block_data/nhgis0092_ds147_2000_block.csv")
b10_df <- readr::read_csv("block_data/nhgis0092_ds172_2010_block.csv")
b20_df <- readr::read_csv("block_data/nhgis0092_ds258_2020_block.csv")

clean_block_data <- function(shp, df, shp_b_id, df_b_id, var_prefix, df_n_black, df_n_white, drop_var="", year) {
  shp <- shp %>% select(!!sym(shp_b_id)) %>% rename("geo_id" = !!sym(shp_b_id))
  if (drop_var!="") {df <- df %>% select(-!!sym(drop_var))}
  df <- df %>% 
    select(!!sym(df_b_id), starts_with(var_prefix)) %>%
    rowwise() %>%
    mutate(n_tot = sum(c_across(starts_with(var_prefix))))
    
  df <- df %>%
    rename("geo_id" = !!sym(df_b_id),
           "n_black" = !!sym(df_n_black),
           "n_white" = !!sym(df_n_white)) %>%
    select(-starts_with(var_prefix)) %>%
    mutate(n_other = n_tot - (n_black + n_white))
  
  shp <- dplyr::left_join(shp, df, by="geo_id")
  shp <- sf::st_transform(shp, 4326)
  shp$geo_area_meters <- sf::st_area(shp)
  shp$year <- year
  
  return(shp)
}

# 1970 block data has to be specially cleaned, see https://forum.ipums.org/t/race-ethnicity-data-at-a-block-level-from-1970/6178
b70_df$c_black <- b70_df$CM6001 + b70_df$CM6002
b70_df$c_other <- b70_df$CM6003 + b70_df$CM6004
b70_df$c_white <- b70_df$CM5001 + b70_df$CM5002 - b70_df$c_black - b70_df$c_other

b70_shp <- clean_block_data(b70_shp, b70_df, "GISJOIN", "GISJOIN", "c_", "c_black", "c_white", year=1970)
b80_shp <- clean_block_data(b80_shp, b80_df, "GISJOIN", "GISJOIN", "C9D0", "C9D002", "C9D001", year=1980)
b90_shp <- clean_block_data(b90_shp, b90_df, "GISJOIN", "GISJOIN", "EUY0", "EUY002", "EUY001", year=1990)
b00_shp <- clean_block_data(b00_shp, b00_df, "GISJOIN", "GISJOIN", "FYE0", "FYE002", "FYE001", year=2000)
b10_shp <- clean_block_data(b10_shp, b10_df, "GISJOIN", "GISJOIN", "H7X", "H7X003", "H7X002", "H7X001", year=2010)
b20_shp <- clean_block_data(b20_shp, b20_df, "GISJOIN", "GISJOIN", "U7J", "U7J003", "U7J002", "U7J001", year=2020)

rm(b70_df, b80_df, b90_df, b00_df, b10_df, b20_df)

plot(sf::st_geometry(b70_shp["geo_id"]))

Get the blocks in the HDs:

gc()
##           used  (Mb) gc trigger  (Mb) max used  (Mb)
## Ncells 1891989 101.1    3866905 206.6  3866905 206.6
## Vcells 6996244  53.4   19206178 146.6 14604356 111.5
mp = 0.25 # TODO: run analysis varying this factor
hd_geos70 <- get_geos_in_hd(b70_shp, min_pct = mp, year = 1970)
hd_geos80 <- get_geos_in_hd(b80_shp, min_pct = mp, year = 1980)
hd_geos90 <- get_geos_in_hd(b90_shp, min_pct = mp, year = 1990)
hd_geos00 <- get_geos_in_hd(b00_shp, min_pct = mp, year = 2000)
hd_geos10 <- get_geos_in_hd(b10_shp, min_pct = mp, year = 2010)
hd_geos20 <- get_geos_in_hd(b20_shp, min_pct = mp, year = 2020)

Get the blocks neighboring the HDs:

gc()
##           used  (Mb) gc trigger  (Mb) max used  (Mb)
## Ncells 1933390 103.3    3866905 206.6  3866905 206.6
## Vcells 7249170  55.4   19206178 146.6 14604356 111.5
buff_dist = .005
threshold = .1

nearby_blocks70 <- 
  get_neighbor_geos(hd_shp=hd_shp, geo_shp=b70_shp, buffer_dist=buff_dist, geos_in_hd=hd_geos70,threshold, 1970)
nearby_blocks80 <- 
  get_neighbor_geos(hd_shp=hd_shp, geo_shp=b80_shp, buffer_dist=buff_dist, geos_in_hd=hd_geos80,threshold, 1980)
nearby_blocks90 <- 
  get_neighbor_geos(hd_shp=hd_shp, geo_shp=b90_shp, buffer_dist=buff_dist, geos_in_hd=hd_geos90,threshold, 1990)
nearby_blocks00 <- 
  get_neighbor_geos(hd_shp=hd_shp, geo_shp=b00_shp, buffer_dist=buff_dist, geos_in_hd=hd_geos00,threshold, 2000)
nearby_blocks10 <- 
  get_neighbor_geos(hd_shp=hd_shp, geo_shp=b10_shp, buffer_dist=buff_dist, geos_in_hd=hd_geos10,threshold, 2010)
nearby_blocks20 <- 
  get_neighbor_geos(hd_shp=hd_shp, geo_shp=b20_shp, buffer_dist=buff_dist, geos_in_hd=hd_geos20,threshold, 2020)

Again let’s peek at a map to make sure everything looks ok:

plot_geos(b70_shp, nearby_blocks70, hd_geos70$geos_in_hd$geo_id)
plot_geos(b20_shp, nearby_blocks20, hd_geos20$geos_in_hd$geo_id)

Now compare the demographics of the HD blocks and their neighbors in each year:

options(width = 200)
hd_comp_df <- dplyr::bind_rows(hd_geos70[[2]], hd_geos80[[2]], hd_geos90[[2]], 
                               hd_geos00[[2]], hd_geos10[[2]], hd_geos20[[2]],)

near_comp_df <- dplyr::bind_rows(nearby_blocks70[[3]], nearby_blocks80[[3]], nearby_blocks90[[3]], 
                                 nearby_blocks00[[3]], nearby_blocks10[[3]], nearby_blocks20[[3]],)

comp_df <- run_regressions(hd_comp_df, near_comp_df, weights = T)
## [1] "__________________________________"
## [1] "D-in-D regression for the % of black residents, weighted by HD population"
## 
## Call:
## lm(formula = percent ~ treated + desig_yet_hd + treated:LABEL_hd + 
##     treated:desig_yet_hd + LABEL_hd + year_hd, data = comp_df[comp_df$group == 
##     "black", ], weights = n_tot_hd)
## 
## Weighted Residuals:
##     Min      1Q  Median      3Q     Max 
## -35.171  -4.228  -0.335   4.309  21.542 
## 
## Coefficients:
##                                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                                    1.094499   0.050192  21.806  < 2e-16 ***
## treated                                        0.022764   0.070182   0.324 0.745899    
## desig_yet_hd                                  -0.035263   0.028263  -1.248 0.213174    
## LABEL_hdBlagden Alley/Naylor Court HD         -0.338856   0.140220  -2.417 0.016290 *  
## LABEL_hdBloomingdale HD                       -0.114821   0.065567  -1.751 0.080980 .  
## LABEL_hdCapitol Hill HD                       -0.236743   0.051493  -4.598 6.41e-06 ***
## LABEL_hdCleveland Park HD                     -0.852852   0.065177 -13.085  < 2e-16 ***
## LABEL_hdDowntown HD                           -0.384588   0.115730  -3.323 0.001006 ** 
## LABEL_hdDupont Circle HD                      -0.782856   0.055290 -14.159  < 2e-16 ***
## LABEL_hdFinancial HD                          -0.515653   0.266980  -1.931 0.054414 .  
## LABEL_hdFoggy Bottom HD                       -0.867560   0.103328  -8.396 2.14e-15 ***
## LABEL_hdFoxhall HD                            -0.886399   0.129671  -6.836 4.90e-11 ***
## LABEL_hdGreater 14th St HD                    -0.416413   0.060490  -6.884 3.67e-11 ***
## LABEL_hdGreater U St HD                       -0.241097   0.059891  -4.026 7.28e-05 ***
## LABEL_hdGWU / Old West End HD                 -0.823820   0.081959 -10.052  < 2e-16 ***
## LABEL_hdKalorama Triangle HD                  -0.623772   0.070745  -8.817  < 2e-16 ***
## LABEL_hdKingman Park HD                       -0.031854   0.076276  -0.418 0.676539    
## LABEL_hdLafayette Square HD                   -0.856368   0.687126  -1.246 0.213669    
## LABEL_hdLeDroit Park HD                       -0.101589   0.094925  -1.070 0.285428    
## LABEL_hdLogan Circle HD                       -0.377970   0.099998  -3.780 0.000191 ***
## LABEL_hdMassachusetts Ave HD                  -0.864398   0.076554 -11.291  < 2e-16 ***
## LABEL_hdMeridian Hill                         -0.392792   0.069081  -5.686 3.20e-08 ***
## LABEL_hdMt. Pleasant HD                       -0.417057   0.057361  -7.271 3.41e-12 ***
## LABEL_hdMt. Vernon Square HD                  -0.202336   0.091680  -2.207 0.028107 *  
## LABEL_hdPennsylvania Ave NHS                  -0.424723   0.116158  -3.656 0.000304 ***
## LABEL_hdShaw HD                               -0.325184   0.065444  -4.969 1.16e-06 ***
## LABEL_hdSheridan-Kalorama HD                  -0.858872   0.077214 -11.123  < 2e-16 ***
## LABEL_hdSixteenth St HD                       -0.382411   0.063261  -6.045 4.64e-09 ***
## LABEL_hdStrivers' Section HD                  -0.379721   0.073844  -5.142 5.04e-07 ***
## LABEL_hdTakoma Park HD                        -0.213681   0.095558  -2.236 0.026112 *  
## LABEL_hdWashington Heights HD                 -0.583130   0.073393  -7.945 4.44e-14 ***
## LABEL_hdWoodley Park HD                       -0.847260   0.078205 -10.834  < 2e-16 ***
## year_hd1980                                   -0.032958   0.023986  -1.374 0.170490    
## year_hd1990                                   -0.092701   0.026979  -3.436 0.000678 ***
## year_hd2000                                   -0.152123   0.029927  -5.083 6.70e-07 ***
## year_hd2010                                   -0.282790   0.030199  -9.364  < 2e-16 ***
## year_hd2020                                   -0.350601   0.032266 -10.866  < 2e-16 ***
## treated:LABEL_hdBlagden Alley/Naylor Court HD -0.084637   0.198077  -0.427 0.669485    
## treated:LABEL_hdBloomingdale HD               -0.056265   0.090885  -0.619 0.536353    
## treated:LABEL_hdCapitol Hill HD               -0.316951   0.072797  -4.354 1.86e-05 ***
## treated:LABEL_hdCleveland Park HD              0.030033   0.091938   0.327 0.744158    
## treated:LABEL_hdDowntown HD                   -0.306190   0.162816  -1.881 0.061041 .  
## treated:LABEL_hdDupont Circle HD               0.041643   0.078166   0.533 0.594620    
## treated:LABEL_hdFinancial HD                   0.006616   0.377289   0.018 0.986021    
## treated:LABEL_hdFoggy Bottom HD               -0.025687   0.146019  -0.176 0.860484    
## treated:LABEL_hdFoxhall HD                    -0.031188   0.182714  -0.171 0.864584    
## treated:LABEL_hdGreater 14th St HD            -0.117139   0.084921  -1.379 0.168848    
## treated:LABEL_hdGreater U St HD               -0.101161   0.084094  -1.203 0.229987    
## treated:LABEL_hdGWU / Old West End HD         -0.007980   0.114031  -0.070 0.944259    
## treated:LABEL_hdKalorama Triangle HD          -0.202589   0.099838  -2.029 0.043362 *  
## treated:LABEL_hdKingman Park HD               -0.001868   0.106286  -0.018 0.985988    
## treated:LABEL_hdLafayette Square HD           -0.062043   0.971705  -0.064 0.949135    
## treated:LABEL_hdLeDroit Park HD                0.009881   0.134229   0.074 0.941369    
## treated:LABEL_hdLogan Circle HD               -0.076563   0.141391  -0.541 0.588588    
## treated:LABEL_hdMassachusetts Ave HD           0.050279   0.108247   0.464 0.642654    
## treated:LABEL_hdMeridian Hill                 -0.125543   0.095732  -1.311 0.190768    
## treated:LABEL_hdMt. Pleasant HD               -0.146905   0.080867  -1.817 0.070318 .  
## treated:LABEL_hdMt. Vernon Square HD          -0.055779   0.129242  -0.432 0.666365    
## treated:LABEL_hdPennsylvania Ave NHS          -0.170463   0.163933  -1.040 0.299293    
## treated:LABEL_hdShaw HD                       -0.095298   0.091978  -1.036 0.301034    
## treated:LABEL_hdSheridan-Kalorama HD           0.001568   0.109016   0.014 0.988536    
## treated:LABEL_hdSixteenth St HD               -0.256214   0.089443  -2.865 0.004484 ** 
## treated:LABEL_hdStrivers' Section HD          -0.168225   0.104244  -1.614 0.107679    
## treated:LABEL_hdTakoma Park HD                -0.132561   0.134944  -0.982 0.326762    
## treated:LABEL_hdWashington Heights HD         -0.084122   0.102776  -0.818 0.413752    
## treated:LABEL_hdWoodley Park HD               -0.034322   0.110095  -0.312 0.755457    
## treated:desig_yet_hd                          -0.066337   0.028795  -2.304 0.021949 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.845 on 287 degrees of freedom
##   (3 observations deleted due to missingness)
## Multiple R-squared:  0.8878, Adjusted R-squared:  0.862 
## F-statistic:  34.4 on 66 and 287 DF,  p-value: < 2.2e-16
## 
## [1] "__________________________________"
## [1] "D-in-D regression for the % of white residents, weighted by HD population"
## 
## Call:
## lm(formula = percent ~ treated + desig_yet_hd + treated:LABEL_hd + 
##     treated:desig_yet_hd + LABEL_hd + year_hd, data = comp_df[comp_df$group == 
##     "white", ], weights = n_tot_hd)
## 
## Weighted Residuals:
##     Min      1Q  Median      3Q     Max 
## -25.269  -3.279   0.230   3.618  36.360 
## 
## Coefficients:
##                                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                                   -0.031600   0.046944  -0.673 0.501396    
## treated                                       -0.039740   0.065640  -0.605 0.545376    
## desig_yet_hd                                   0.014814   0.026434   0.560 0.575631    
## LABEL_hdBlagden Alley/Naylor Court HD          0.236797   0.131145   1.806 0.072027 .  
## LABEL_hdBloomingdale HD                        0.075693   0.061324   1.234 0.218094    
## LABEL_hdCapitol Hill HD                        0.211757   0.048160   4.397 1.55e-05 ***
## LABEL_hdCleveland Park HD                      0.778580   0.060959  12.772  < 2e-16 ***
## LABEL_hdDowntown HD                            0.263954   0.108240   2.439 0.015351 *  
## LABEL_hdDupont Circle HD                       0.679745   0.051712  13.145  < 2e-16 ***
## LABEL_hdFinancial HD                           0.408283   0.249701   1.635 0.103128    
## LABEL_hdFoggy Bottom HD                        0.772065   0.096641   7.989 3.33e-14 ***
## LABEL_hdFoxhall HD                             0.811585   0.121279   6.692 1.15e-10 ***
## LABEL_hdGreater 14th St HD                     0.320510   0.056575   5.665 3.57e-08 ***
## LABEL_hdGreater U St HD                        0.142131   0.056015   2.537 0.011698 *  
## LABEL_hdGWU / Old West End HD                  0.699275   0.076655   9.122  < 2e-16 ***
## LABEL_hdKalorama Triangle HD                   0.503375   0.066166   7.608 4.02e-13 ***
## LABEL_hdKingman Park HD                        0.012550   0.071339   0.176 0.860480    
## LABEL_hdLafayette Square HD                    0.751400   0.642657   1.169 0.243290    
## LABEL_hdLeDroit Park HD                        0.069107   0.088782   0.778 0.436981    
## LABEL_hdLogan Circle HD                        0.291051   0.093526   3.112 0.002046 ** 
## LABEL_hdMassachusetts Ave HD                   0.779070   0.071599  10.881  < 2e-16 ***
## LABEL_hdMeridian Hill                          0.231990   0.064610   3.591 0.000388 ***
## LABEL_hdMt. Pleasant HD                        0.254619   0.053648   4.746 3.28e-06 ***
## LABEL_hdMt. Vernon Square HD                   0.129731   0.085747   1.513 0.131392    
## LABEL_hdPennsylvania Ave NHS                   0.306921   0.108641   2.825 0.005058 ** 
## LABEL_hdShaw HD                                0.221154   0.061209   3.613 0.000357 ***
## LABEL_hdSheridan-Kalorama HD                   0.771586   0.072217  10.684  < 2e-16 ***
## LABEL_hdSixteenth St HD                        0.252728   0.059167   4.271 2.64e-05 ***
## LABEL_hdStrivers' Section HD                   0.222277   0.069065   3.218 0.001437 ** 
## LABEL_hdTakoma Park HD                         0.136913   0.089374   1.532 0.126645    
## LABEL_hdWashington Heights HD                  0.446988   0.068643   6.512 3.31e-10 ***
## LABEL_hdWoodley Park HD                        0.768093   0.073144  10.501  < 2e-16 ***
## year_hd1980                                    0.007038   0.022433   0.314 0.753947    
## year_hd1990                                    0.041604   0.025233   1.649 0.100285    
## year_hd2000                                    0.048793   0.027990   1.743 0.082360 .  
## year_hd2010                                    0.184377   0.028245   6.528 3.02e-10 ***
## year_hd2020                                    0.172556   0.030178   5.718 2.70e-08 ***
## treated:LABEL_hdBlagden Alley/Naylor Court HD  0.021128   0.185258   0.114 0.909281    
## treated:LABEL_hdBloomingdale HD                0.070782   0.085003   0.833 0.405705    
## treated:LABEL_hdCapitol Hill HD                0.320223   0.068085   4.703 3.98e-06 ***
## treated:LABEL_hdCleveland Park HD             -0.011632   0.085988  -0.135 0.892486    
## treated:LABEL_hdDowntown HD                    0.078352   0.152279   0.515 0.607278    
## treated:LABEL_hdDupont Circle HD              -0.024199   0.073107  -0.331 0.740884    
## treated:LABEL_hdFinancial HD                   0.018211   0.352872   0.052 0.958877    
## treated:LABEL_hdFoggy Bottom HD                0.031484   0.136569   0.231 0.817843    
## treated:LABEL_hdFoxhall HD                     0.051843   0.170889   0.303 0.761828    
## treated:LABEL_hdGreater 14th St HD             0.132962   0.079425   1.674 0.095209 .  
## treated:LABEL_hdGreater U St HD                0.128867   0.078652   1.638 0.102422    
## treated:LABEL_hdGWU / Old West End HD          0.039226   0.106651   0.368 0.713296    
## treated:LABEL_hdKalorama Triangle HD           0.260759   0.093376   2.793 0.005581 ** 
## treated:LABEL_hdKingman Park HD                0.018578   0.099408   0.187 0.851879    
## treated:LABEL_hdLafayette Square HD            0.074087   0.908818   0.082 0.935085    
## treated:LABEL_hdLeDroit Park HD               -0.003239   0.125542  -0.026 0.979437    
## treated:LABEL_hdLogan Circle HD                0.101303   0.132241   0.766 0.444278    
## treated:LABEL_hdMassachusetts Ave HD          -0.058986   0.101242  -0.583 0.560605    
## treated:LABEL_hdMeridian Hill                  0.157897   0.089536   1.763 0.078880 .  
## treated:LABEL_hdMt. Pleasant HD                0.140649   0.075634   1.860 0.063965 .  
## treated:LABEL_hdMt. Vernon Square HD           0.064458   0.120877   0.533 0.594269    
## treated:LABEL_hdPennsylvania Ave NHS           0.213345   0.153323   1.391 0.165160    
## treated:LABEL_hdShaw HD                        0.045602   0.086026   0.530 0.596458    
## treated:LABEL_hdSheridan-Kalorama HD           0.016831   0.101961   0.165 0.869000    
## treated:LABEL_hdSixteenth St HD                0.275096   0.083654   3.288 0.001133 ** 
## treated:LABEL_hdStrivers' Section HD           0.237396   0.097498   2.435 0.015506 *  
## treated:LABEL_hdTakoma Park HD                 0.150508   0.126211   1.193 0.234046    
## treated:LABEL_hdWashington Heights HD          0.135040   0.096125   1.405 0.161151    
## treated:LABEL_hdWoodley Park HD                0.034886   0.102970   0.339 0.735009    
## treated:desig_yet_hd                           0.079315   0.026931   2.945 0.003493 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.338 on 287 degrees of freedom
##   (3 observations deleted due to missingness)
## Multiple R-squared:  0.8712, Adjusted R-squared:  0.8416 
## F-statistic: 29.42 on 66 and 287 DF,  p-value: < 2.2e-16
comp_df <- run_regressions(hd_comp_df, near_comp_df, weights = F)
## [1] "__________________________________"
## [1] "D-in-D regression for the % of black residents, NOT weighted by HD population"
## 
## Call:
## lm(formula = percent ~ treated + desig_yet_hd + treated:LABEL_hd + 
##     treated:desig_yet_hd + LABEL_hd + year_hd, data = comp_df[comp_df$group == 
##     "black", ])
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.32305 -0.08752 -0.01128  0.08908  0.38220 
## 
## Coefficients:
##                                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                                    1.0781078  0.0604448  17.836  < 2e-16 ***
## treated                                        0.0191811  0.0840252   0.228 0.819591    
## desig_yet_hd                                  -0.0408531  0.0313702  -1.302 0.193850    
## LABEL_hdBlagden Alley/Naylor Court HD         -0.3540140  0.0801221  -4.418 1.41e-05 ***
## LABEL_hdBloomingdale HD                       -0.1489255  0.0821436  -1.813 0.070867 .  
## LABEL_hdCapitol Hill HD                       -0.2686395  0.0794368  -3.382 0.000819 ***
## LABEL_hdCleveland Park HD                     -0.9025268  0.0796087 -11.337  < 2e-16 ***
## LABEL_hdDowntown HD                           -0.4219638  0.0809705  -5.211 3.57e-07 ***
## LABEL_hdDupont Circle HD                      -0.8191686  0.0794368 -10.312  < 2e-16 ***
## LABEL_hdFinancial HD                          -0.5367684  0.0796087  -6.743 8.41e-11 ***
## LABEL_hdFoggy Bottom HD                       -0.8853311  0.0796087 -11.121  < 2e-16 ***
## LABEL_hdFoxhall HD                            -0.9287913  0.0809705 -11.471  < 2e-16 ***
## LABEL_hdGreater 14th St HD                    -0.4420809  0.0801221  -5.518 7.62e-08 ***
## LABEL_hdGreater U St HD                       -0.2740548  0.0801221  -3.420 0.000715 ***
## LABEL_hdGWU / Old West End HD                 -0.9172140  0.0821436 -11.166  < 2e-16 ***
## LABEL_hdKalorama Triangle HD                  -0.6618395  0.0796087  -8.314 3.64e-15 ***
## LABEL_hdKingman Park HD                       -0.0635144  0.0821436  -0.773 0.440027    
## LABEL_hdLafayette Square HD                   -0.7791059  0.0794368  -9.808  < 2e-16 ***
## LABEL_hdLeDroit Park HD                       -0.1410615  0.0794368  -1.776 0.076819 .  
## LABEL_hdLogan Circle HD                       -0.4037128  0.0794368  -5.082 6.70e-07 ***
## LABEL_hdMassachusetts Ave HD                  -0.8926077  0.0794368 -11.237  < 2e-16 ***
## LABEL_hdMeridian Hill                         -0.4363197  0.0821436  -5.312 2.17e-07 ***
## LABEL_hdMt. Pleasant HD                       -0.4521275  0.0796087  -5.679 3.28e-08 ***
## LABEL_hdMt. Vernon Square HD                  -0.2195924  0.0801221  -2.741 0.006511 ** 
## LABEL_hdPennsylvania Ave NHS                  -0.4470505  0.0801221  -5.580 5.53e-08 ***
## LABEL_hdShaw HD                               -0.3474333  0.0801221  -4.336 2.00e-05 ***
## LABEL_hdSheridan-Kalorama HD                  -0.8951278  0.0796087 -11.244  < 2e-16 ***
## LABEL_hdSixteenth St HD                       -0.4180164  0.0794368  -5.262 2.77e-07 ***
## LABEL_hdStrivers' Section HD                  -0.4270880  0.0796087  -5.365 1.66e-07 ***
## LABEL_hdTakoma Park HD                        -0.2516734  0.0796087  -3.161 0.001737 ** 
## LABEL_hdWashington Heights HD                 -0.6278169  0.0809705  -7.754 1.52e-13 ***
## LABEL_hdWoodley Park HD                       -0.8786475  0.0801221 -10.966  < 2e-16 ***
## year_hd1980                                    0.0052798  0.0261118   0.202 0.839903    
## year_hd1990                                   -0.0288285  0.0288831  -0.998 0.319059    
## year_hd2000                                   -0.0918110  0.0325945  -2.817 0.005184 ** 
## year_hd2010                                   -0.2015444  0.0343195  -5.873 1.17e-08 ***
## year_hd2020                                   -0.2501095  0.0366804  -6.819 5.34e-11 ***
## treated:LABEL_hdBlagden Alley/Naylor Court HD -0.1297616  0.1128734  -1.150 0.251247    
## treated:LABEL_hdBloomingdale HD               -0.0565204  0.1144571  -0.494 0.621812    
## treated:LABEL_hdCapitol Hill HD               -0.3372164  0.1123406  -3.002 0.002918 ** 
## treated:LABEL_hdCleveland Park HD              0.0222405  0.1124740   0.198 0.843388    
## treated:LABEL_hdDowntown HD                   -0.3557666  0.1135360  -3.134 0.001904 ** 
## treated:LABEL_hdDupont Circle HD               0.0345235  0.1123406   0.307 0.758827    
## treated:LABEL_hdFinancial HD                  -0.3184136  0.1153536  -2.760 0.006142 ** 
## treated:LABEL_hdFoggy Bottom HD               -0.0211810  0.1124740  -0.188 0.850758    
## treated:LABEL_hdFoxhall HD                    -0.0322913  0.1135360  -0.284 0.776296    
## treated:LABEL_hdGreater 14th St HD            -0.1350019  0.1128734  -1.196 0.232655    
## treated:LABEL_hdGreater U St HD               -0.1094277  0.1128734  -0.969 0.333117    
## treated:LABEL_hdGWU / Old West End HD         -0.0163062  0.1144571  -0.142 0.886811    
## treated:LABEL_hdKalorama Triangle HD          -0.2114197  0.1124740  -1.880 0.061149 .  
## treated:LABEL_hdKingman Park HD               -0.0004392  0.1144571  -0.004 0.996941    
## treated:LABEL_hdLafayette Square HD           -0.1489173  0.1192810  -1.248 0.212870    
## treated:LABEL_hdLeDroit Park HD                0.0005028  0.1123406   0.004 0.996432    
## treated:LABEL_hdLogan Circle HD               -0.0939186  0.1123406  -0.836 0.403834    
## treated:LABEL_hdMassachusetts Ave HD           0.0509765  0.1123406   0.454 0.650336    
## treated:LABEL_hdMeridian Hill                 -0.1191043  0.1144571  -1.041 0.298927    
## treated:LABEL_hdMt. Pleasant HD               -0.1499667  0.1124740  -1.333 0.183465    
## treated:LABEL_hdMt. Vernon Square HD          -0.0778555  0.1128734  -0.690 0.490897    
## treated:LABEL_hdPennsylvania Ave NHS          -0.1103306  0.1128734  -0.977 0.329150    
## treated:LABEL_hdShaw HD                       -0.1037016  0.1128734  -0.919 0.358994    
## treated:LABEL_hdSheridan-Kalorama HD          -0.0023706  0.1124740  -0.021 0.983199    
## treated:LABEL_hdSixteenth St HD               -0.2649554  0.1123406  -2.359 0.019012 *  
## treated:LABEL_hdStrivers' Section HD          -0.1788848  0.1124740  -1.590 0.112822    
## treated:LABEL_hdTakoma Park HD                -0.1345343  0.1124740  -1.196 0.232620    
## treated:LABEL_hdWashington Heights HD         -0.0898832  0.1135360  -0.792 0.429199    
## treated:LABEL_hdWoodley Park HD               -0.0329411  0.1128734  -0.292 0.770617    
## treated:desig_yet_hd                          -0.0534302  0.0328640  -1.626 0.105079    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1376 on 290 degrees of freedom
##   (3 observations deleted due to missingness)
## Multiple R-squared:  0.861,  Adjusted R-squared:  0.8294 
## F-statistic: 27.22 on 66 and 290 DF,  p-value: < 2.2e-16
## 
## [1] "__________________________________"
## [1] "D-in-D regression for the % of white residents, NOT weighted by HD population"
## 
## Call:
## lm(formula = percent ~ treated + desig_yet_hd + treated:LABEL_hd + 
##     treated:desig_yet_hd + LABEL_hd + year_hd, data = comp_df[comp_df$group == 
##     "white", ])
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.39134 -0.07594  0.00726  0.07632  0.30179 
## 
## Coefficients:
##                                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                                   -0.003230   0.057244  -0.056 0.955047    
## treated                                       -0.033886   0.079576  -0.426 0.670543    
## desig_yet_hd                                   0.029134   0.029709   0.981 0.327581    
## LABEL_hdBlagden Alley/Naylor Court HD          0.244440   0.075880   3.221 0.001421 ** 
## LABEL_hdBloomingdale HD                        0.105744   0.077794   1.359 0.175114    
## LABEL_hdCapitol Hill HD                        0.234965   0.075231   3.123 0.001969 ** 
## LABEL_hdCleveland Park HD                      0.817124   0.075393  10.838  < 2e-16 ***
## LABEL_hdDowntown HD                            0.277206   0.076683   3.615 0.000354 ***
## LABEL_hdDupont Circle HD                       0.704321   0.075231   9.362  < 2e-16 ***
## LABEL_hdFinancial HD                           0.416561   0.075393   5.525 7.33e-08 ***
## LABEL_hdFoggy Bottom HD                        0.776604   0.075393  10.301  < 2e-16 ***
## LABEL_hdFoxhall HD                             0.846157   0.076683  11.034  < 2e-16 ***
## LABEL_hdGreater 14th St HD                     0.337076   0.075880   4.442 1.27e-05 ***
## LABEL_hdGreater U St HD                        0.163839   0.075880   2.159 0.031655 *  
## LABEL_hdGWU / Old West End HD                  0.795304   0.077794  10.223  < 2e-16 ***
## LABEL_hdKalorama Triangle HD                   0.529399   0.075393   7.022 1.56e-11 ***
## LABEL_hdKingman Park HD                        0.042462   0.077794   0.546 0.585609    
## LABEL_hdLafayette Square HD                    0.669767   0.075231   8.903  < 2e-16 ***
## LABEL_hdLeDroit Park HD                        0.093502   0.075231   1.243 0.214921    
## LABEL_hdLogan Circle HD                        0.306925   0.075231   4.080 5.83e-05 ***
## LABEL_hdMassachusetts Ave HD                   0.796135   0.075231  10.583  < 2e-16 ***
## LABEL_hdMeridian Hill                          0.266024   0.077794   3.420 0.000717 ***
## LABEL_hdMt. Pleasant HD                        0.282041   0.075393   3.741 0.000221 ***
## LABEL_hdMt. Vernon Square HD                   0.140268   0.075880   1.849 0.065539 .  
## LABEL_hdPennsylvania Ave NHS                   0.308321   0.075880   4.063 6.23e-05 ***
## LABEL_hdShaw HD                                0.235056   0.075880   3.098 0.002141 ** 
## LABEL_hdSheridan-Kalorama HD                   0.797415   0.075393  10.577  < 2e-16 ***
## LABEL_hdSixteenth St HD                        0.275655   0.075231   3.664 0.000295 ***
## LABEL_hdStrivers' Section HD                   0.253534   0.075393   3.363 0.000875 ***
## LABEL_hdTakoma Park HD                         0.171260   0.075393   2.272 0.023847 *  
## LABEL_hdWashington Heights HD                  0.480371   0.076683   6.264 1.35e-09 ***
## LABEL_hdWoodley Park HD                        0.790666   0.075880  10.420  < 2e-16 ***
## year_hd1980                                   -0.039272   0.024729  -1.588 0.113357    
## year_hd1990                                   -0.024129   0.027354  -0.882 0.378443    
## year_hd2000                                   -0.015156   0.030869  -0.491 0.623805    
## year_hd2010                                    0.091234   0.032502   2.807 0.005339 ** 
## year_hd2020                                    0.055787   0.034738   1.606 0.109377    
## treated:LABEL_hdBlagden Alley/Naylor Court HD  0.069584   0.106897   0.651 0.515600    
## treated:LABEL_hdBloomingdale HD                0.069967   0.108397   0.645 0.519129    
## treated:LABEL_hdCapitol Hill HD                0.341861   0.106392   3.213 0.001461 ** 
## treated:LABEL_hdCleveland Park HD             -0.003632   0.106519  -0.034 0.972826    
## treated:LABEL_hdDowntown HD                    0.075723   0.107524   0.704 0.481850    
## treated:LABEL_hdDupont Circle HD              -0.015558   0.106392  -0.146 0.883841    
## treated:LABEL_hdFinancial HD                   0.387904   0.109246   3.551 0.000448 ***
## treated:LABEL_hdFoggy Bottom HD                0.024415   0.106519   0.229 0.818866    
## treated:LABEL_hdFoxhall HD                     0.052707   0.107524   0.490 0.624369    
## treated:LABEL_hdGreater 14th St HD             0.149879   0.106897   1.402 0.161958    
## treated:LABEL_hdGreater U St HD                0.137578   0.106897   1.287 0.199115    
## treated:LABEL_hdGWU / Old West End HD          0.031011   0.108397   0.286 0.775012    
## treated:LABEL_hdKalorama Triangle HD           0.271883   0.106519   2.552 0.011210 *  
## treated:LABEL_hdKingman Park HD                0.015484   0.108397   0.143 0.886514    
## treated:LABEL_hdLafayette Square HD            0.180526   0.112965   1.598 0.111117    
## treated:LABEL_hdLeDroit Park HD                0.009244   0.106392   0.087 0.930819    
## treated:LABEL_hdLogan Circle HD                0.118472   0.106392   1.114 0.266401    
## treated:LABEL_hdMassachusetts Ave HD          -0.058986   0.106392  -0.554 0.579718    
## treated:LABEL_hdMeridian Hill                  0.151054   0.108397   1.394 0.164526    
## treated:LABEL_hdMt. Pleasant HD                0.148040   0.106519   1.390 0.165655    
## treated:LABEL_hdMt. Vernon Square HD           0.081835   0.106897   0.766 0.444567    
## treated:LABEL_hdPennsylvania Ave NHS           0.179568   0.106897   1.680 0.094067 .  
## treated:LABEL_hdShaw HD                        0.059330   0.106897   0.555 0.579309    
## treated:LABEL_hdSheridan-Kalorama HD           0.022417   0.106519   0.210 0.833460    
## treated:LABEL_hdSixteenth St HD                0.286194   0.106392   2.690 0.007560 ** 
## treated:LABEL_hdStrivers' Section HD           0.254766   0.106519   2.392 0.017405 *  
## treated:LABEL_hdTakoma Park HD                 0.148948   0.106519   1.398 0.163084    
## treated:LABEL_hdWashington Heights HD          0.142159   0.107524   1.322 0.187172    
## treated:LABEL_hdWoodley Park HD                0.033545   0.106897   0.314 0.753891    
## treated:desig_yet_hd                           0.060813   0.031124   1.954 0.051675 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1303 on 290 degrees of freedom
##   (3 observations deleted due to missingness)
## Multiple R-squared:  0.8494, Adjusted R-squared:  0.8151 
## F-statistic: 24.79 on 66 and 290 DF,  p-value: < 2.2e-16